import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv(os.path.join(sys.argv[1], 'credit_customers.csv'))

# Calculate the 75th percentile for credit_amount and duration  
credit_amount_75th = credit_customers['credit_amount'].quantile(0.75)  
duration_75th = credit_customers['duration'].quantile(0.75)  
  
# Filter the dataset based on the calculated values  
high_credit_long_duration = credit_customers[(credit_customers['credit_amount'] > credit_amount_75th) & (credit_customers['duration'] > duration_75th)]  
  
# Extract the required information (client IDs, credit amounts, and loan durations)  
result = high_credit_long_duration[['credit_amount', 'duration']]  
  
# Convert the result to a list of tuples  
result_list = list(result.itertuples(index=True, name=None))  
  
print(result_list) 
# pickle.dump(result_list,open("./ref_result/result_list_1.pkl","wb"))

import pandas as pd  
import pickle 
  
# Load the dataset  

# Filter the clients with a history of late payments  
late_payments = high_credit_long_duration[high_credit_long_duration['credit_history'] == 'delayed previously']  
  
# Extract the client IDs  
result = late_payments.index  
  
# Convert the result to a list  
result_list = list(result)  
  
print(result_list)   
# pickle.dump(result_list,open("./ref_result/result_list_2.pkl","wb"))

import pandas as pd  
import pickle 
  
# Load the dataset  

# Filter the clients with multiple existing credits and high installment commitments
multiple_credits_high_commitment = high_credit_long_duration[(high_credit_long_duration['existing_credits'] > 1) & (high_credit_long_duration['installment_commitment'] > credit_customers['installment_commitment'].median())]
# Extract the client IDs
result = multiple_credits_high_commitment.index
# Convert the result to a list
result_list = list(result)

print(result_list) 
# pickle.dump(result_list,open("./ref_result/result_list_3.pkl","wb"))

import pandas as pd  
import pickle 
  
# Load the dataset  

# Filter the clients aged between 25 and 55
clients_25_to_55 = high_credit_long_duration[(high_credit_long_duration['age'] >= 25) & (high_credit_long_duration['age'] <= 55)]
# Calculate the count of clients aged between 25 and 55
result_count = clients_25_to_55.shape[0]
  
print(result_count)
# pickle.dump(result_count,open("./ref_result/result_count_1.pkl","wb"))

import pandas as pd  
import pickle 
  
# Load the dataset  

# Filter the clients with stable employment (4 years or more)
stable_employment = high_credit_long_duration[(high_credit_long_duration['employment'] == '4<=X<7') | (high_credit_long_duration['employment'] == '>=7')]
# Extract the client IDs
result = stable_employment.index
# Convert the result to a list
result_list = list(result)

print(result_list)
# pickle.dump(result_list,open("./ref_result/result_list_4.pkl","wb"))

import pandas as pd  
import pickle 
  
# Load the dataset  

# Filter the clients residing in rented or owned housing  
rented_owned_housing = stable_employment[(stable_employment['housing'] == 'rent') | (stable_employment['housing'] == 'own')]  

# Calculate the count of clients residing in rented or owned housing  
result_count = rented_owned_housing.shape[0]  

print(result_count)
# pickle.dump(result_count,open("./ref_result/result_count_2.pkl","wb"))



import pandas as pd  
import pickle 
  
# Load the dataset  

# Calculate the average credit amount and average loan duration
average_credit_amount = stable_employment['credit_amount'].mean()
average_loan_duration = stable_employment['duration'].mean()

# Find the most common employment status
most_common_employment = stable_employment['employment'].mode().iloc[0]

# Print the summary of common characteristics
print("Average credit amount:", average_credit_amount)
pickle.dump(average_credit_amount,open("./ref_result/average_credit_amount.pkl","wb"))
print("Average loan duration:", average_loan_duration)
pickle.dump(average_loan_duration,open("./ref_result/average_loan_duration.pkl","wb"))
print("Most common employment status:", most_common_employment)
pickle.dump(most_common_employment,open("./ref_result/most_common_employment.pkl","wb"))
